*****************************************************************
* Replication directory for                                   ***
* Prime locations                                             ***
* by Gabriel M. Ahlfeldt, Thilo N.H. Albers, Kristian Behrens ***
* Published in American Economic Review: Insights             ***
*****************************************************************
* 01/2025
* Stata
version 17.0

* This do file prepares the grid-level data set will be used when processing the cluster data to generate PLs
* It insheets the results from teh clustering algorithm
* Merges information from the original grids
* It is executed in a loop over three employment types (99 3 5), each time using the preferred p-value

* Generate folder for final storage of this do file
	capture mkdir "$dataoutput/USMetroGridEmp"

* USe the list of CBSA IDs over which to loop 
	qui u "$data_USMETROS/Raw Numeric Data/METRO LIST/METROS.dta", clear 		// Create a Stata version for later merge of names
		qui tab cbsafp
		local Ncbsafp = r(N) 													// Total number of cities in list
	levelsof  cbsafp, local(USMETROIDS)											// Generate the locals containing all CBSA IDs

* Begin loop by employment type
	foreach EmpType of numlist 99  3 5 { 
		* Generate workspace for outputs
		capture mkdir "$dataoutput/USMetroGridEmp/EmpType_`EmpType'"
		local count = 1	// set counter to one
		* Begin loop by metro to process metro outputs
			foreach USmid of local USMETROIDS{
				di "...Working on employment type `EmpType' and CBSA `USmid', number `count' of `Ncbsafp'..."
				* Import data 
				qui  import delimited "$data_USMETROS/CLUSTER OUTPUT/cells_p${SL}/gridcells_`USmid'_`EmpType'_output.txt", clear 
			* Process variables
				qui drop v1
				qui drop v2
				qui drop v5 v3 v4
				qui foreach var of varlist v6 v7 v8 v9 {
					replace `var' = `var'*180/_pi
				}
				qui ren  v6 lon_UL
				qui ren  v8 lon_LR
				qui ren  v7 lat_UL
				qui ren  v9 lat_LR
				qui ren v11 cell_MFG_emp 	// emp_mfg_wholesale
				qui ren v12 cell_NT_emp 	// emp_nontradable_services
				qui ren v13 cell_PS_emp 	// emp_public_services
				qui ren v14 cell_TS_emp 	// emp_tradable_services
				qui ren v15 cell_O_emp 		// emp_others
				qui ren v16 cell_ST_emp 	// cell_emp_search_terms
				qui ren v17 cell_total_emp
				qui drop v18
				qui ren v19 clusterID
				qui drop v20 
				qui ren v21 cell_id
				qui drop v22 v23												// dropping developable here since it will be merged from the grid data later on 
			* Generate cbasfp ID
				qui split cell_id , p(_)	
				capture drop cell_id2 
				capture drop cell_id3
				qui destring cell_id1, replace force
				qui ren cell_id1 cbsafp
			* Merge legend features
				qui duplicates drop cell_id, force 
				qui merge 1:1 cell_id using "$temp/GridLegend/GRID_`USmid'_final.dta", keepusing(cbsafp square_id metro_id cbsafp area_g developable lon lat metro_name grid_x grid_y grid_y min_x min_y max_x max_y)
				qui drop if _m == 1 // There are cells outside the CBSA since employment is read from NETS in squares and we are mergin from the entire grid universe
     			qui drop _m
				qui replace lon_UL = min_x if lon_UL == .
					qui replace lat_UL = max_y if lat_UL == .
					qui replace lon_LR = max_x if lon_LR == . 
					qui replace lat_LR = min_y if lat_LR == .
					qui drop  min_x min_y max_x max_y // these corner coordinates just duplicate information and will no longer be needed
				qui foreach var of varlist clusterID cell_*_emp { // zero employment cells are missing in algorithm output
						replace `var' = 0 if `var' == .
					}
			* Save CBSA data
				qui compress // finde memomry efficient formats
				qui save "$dataoutput/USMetroGridEmp/EmpType_`EmpType'/EmpGrid_`USmid'", replace
				local count = `count'+1
				}
		* Loop by metro to process metro outputs ends
			
		* Append data
		clear
		foreach USmid of local USMETROIDS{
		display "...appending CBSA `USmid', employment type `EmpType' metro-grid data..."
		append using "$dataoutput/USMetroGridEmp/EmpType_`EmpType'/EmpGrid_`USmid'"
			}	
		* Save data sets
		display "..finalizing `EmpType' metro-grid data..."
		qui duplicates drop 
		qui save "$dataoutput/USMetroGridEmp/EmpType_`EmpType'/EmpGrid__all.dta", replace
	}
	
* Script ends
	